{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb5fcf3c-a043-46ec-8999-ad7c181da58c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import sklearn \n",
    "import openpyxl\n",
    "import warnings\n",
    "from functools import reduce\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55e43c00-2ee0-41b0-9ba3-ccd7d4e6a56d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import folium\n",
    "import json\n",
    "from folium.plugins import MarkerCluster\n",
    "from pathlib import Path\n",
    "import geopandas as gpd\n",
    "from matplotlib import cm, colors\n",
    "import os\n",
    "from scipy.stats import linregress\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8589164e-26f8-48dd-97b9-8d71a146ca7f",
   "metadata": {},
   "source": [
    "# Figure 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a509545-99be-4175-a5fd-6f98aa0df3e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_1 = pd.read_excel(r\"E:\\불평등 연구\\데이터\\59_출퇴근불평등\\map\\df_gu.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4c416a5-eea3-4a8b-bcc5-75b2865bdbf2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_1 = df_1.groupby('name').sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1911f61-86c3-4e08-9a4a-009a325e492e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d684f9d6-94f7-4576-96eb-825c5b71d60b",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10, 4))  # smaller width/height to reduce margin\n",
    "plt.scatter(\n",
    "    df_1['APT_price'],\n",
    "    df_1['Inbound_population'],\n",
    "    s=df_1['Inbound_population'] * 0.00005,\n",
    "    c=df_1['Inbound_population'],\n",
    "    cmap='viridis',\n",
    "    alpha=0.5\n",
    ")\n",
    "\n",
    "sns.regplot(\n",
    "    x='APT_price',\n",
    "    y='Inbound_population',\n",
    "    data=df_1,\n",
    "    scatter=False,\n",
    "    color='red'\n",
    ")\n",
    "\n",
    "plt.xlabel('Average APT price', size=12)\n",
    "plt.ylabel('Inbound Population (Home → Work)', size=12)\n",
    "plt.xticks(fontsize=12)\n",
    "plt.yticks(fontsize=12)\n",
    "\n",
    "# Add text labels\n",
    "for name in df_1.index:\n",
    "    plt.text(\n",
    "        df_1['APT_price'][name] * 1.01,\n",
    "        df_1['Inbound_population'][name],\n",
    "        name,\n",
    "        fontsize=8\n",
    "    )\n",
    "\n",
    "# Colorbar\n",
    "cbar = plt.colorbar()\n",
    "cbar.ax.tick_params(labelsize=10)\n",
    "cbar.set_label('Inbound population', size=12)\n",
    "\n",
    "plt.title(\"APT Price unit: 1,000KRW\", loc='right', size=10)\n",
    "plt.grid(True)\n",
    "\n",
    "# Tighten margins automatically\n",
    "plt.tight_layout()\n",
    "\n",
    "# Save with bbox_inches='tight' to remove extra white space\n",
    "plt.savefig(\"Figure_1.jpg\", dpi=600, bbox_inches='tight')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0fbf703f-b2e4-4953-aa64-dfffc15c3ae3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b3b6a42-f241-400d-a654-9ca48ab43a01",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run regression to get coef and p-value\n",
    "slope, intercept, r_value, p_value, std_err = linregress(\n",
    "    df_1['APT_price'], df_1['Inbound_population']\n",
    ")\n",
    "\n",
    "plt.figure(figsize=(10, 4))  # smaller width/height to reduce margin\n",
    "plt.scatter(\n",
    "    df_1['APT_price'],\n",
    "    df_1['Inbound_population'],\n",
    "    s=df_1['Inbound_population'] * 0.00005,\n",
    "    c=df_1['Inbound_population'],\n",
    "    cmap='viridis',\n",
    "    alpha=0.5\n",
    ")\n",
    "\n",
    "sns.regplot(\n",
    "    x='APT_price',\n",
    "    y='Inbound_population',\n",
    "    data=df_1,\n",
    "    scatter=False,\n",
    "    color='red'\n",
    ")\n",
    "\n",
    "plt.xlabel('Average APT price', size=12)\n",
    "plt.ylabel('Inbound Population (Home → Work)', size=12)\n",
    "plt.xticks(fontsize=12)\n",
    "plt.yticks(fontsize=12)\n",
    "\n",
    "# Add text labels\n",
    "for name in df_1.index:\n",
    "    plt.text(\n",
    "        df_1['APT_price'][name] * 1.01,\n",
    "        df_1['Inbound_population'][name],\n",
    "        name,\n",
    "        fontsize=8\n",
    "    )\n",
    "\n",
    "# Colorbar\n",
    "cbar = plt.colorbar()\n",
    "cbar.ax.tick_params(labelsize=10)\n",
    "cbar.set_label('Inbound population', size=12)\n",
    "\n",
    "plt.title(\"APT Price unit: 1,000KRW\", loc='right', size=10)\n",
    "plt.grid(True)\n",
    "\n",
    "# Add regression info (coef and p-value) as annotation\n",
    "plt.text(\n",
    "    0.05, 0.95,\n",
    "    f\"Coef : {slope:.3f}\\nP-value : {p_value:.6f}\",  # fixed decimal format\n",
    "    transform=plt.gca().transAxes,\n",
    "    fontsize=12,\n",
    "    verticalalignment='top',\n",
    "    bbox=dict(facecolor='white', alpha=0.6, edgecolor='none')\n",
    ")\n",
    "# Tighten margins automatically\n",
    "plt.tight_layout()\n",
    "\n",
    "# Save with bbox_inches='tight' to remove extra white space\n",
    "plt.savefig(\"Figure_1.jpg\", dpi=600, bbox_inches='tight')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b50778ff-3f11-4558-bd12-eb74c6e0194e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "c173bb81-bc62-4c4d-8c21-2f9be8f52e2a",
   "metadata": {},
   "source": [
    "# Figure 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3505e975-74b8-4822-84bb-a6a7405bbb6c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ================== PATHS ==================\n",
    "BASE = r\"E:\\불평등 연구\\데이터\\59_출퇴근불평등\\map\"\n",
    "FILE_COMP = \"df_companies.xlsx\"           # columns: Sido | size ∈ {Total, Conglomerate, SME} | number\n",
    "FILE_APT  = \"df_apt_price_change.xlsx\"    # columns: region | 2013 ... 2024 (wide by year)\n",
    "OUTPUT    = \"subplot_companies_apt.png\"\n",
    "# ===========================================\n",
    "\n",
    "# Korean font on Windows\n",
    "plt.rcParams['font.family'] = 'Malgun Gothic'\n",
    "plt.rcParams['axes.unicode_minus'] = False\n",
    "\n",
    "# ---------- Load data ----------\n",
    "df_comp = pd.read_excel(os.path.join(BASE, FILE_COMP))\n",
    "df_apt  = pd.read_excel(os.path.join(BASE, FILE_APT))\n",
    "\n",
    "# ---------- Prepare companies (grouped bars) ----------\n",
    "pivot_comp = df_comp.pivot_table(index='Sido', columns='size', values='number', aggfunc='sum')\n",
    "cols_pref = [c for c in ['Total', 'Conglomerate', 'SME'] if c in pivot_comp.columns]\n",
    "pivot_comp = pivot_comp[cols_pref]\n",
    "\n",
    "if 'Total' in pivot_comp.columns:\n",
    "    pivot_comp = pivot_comp.sort_values(by='Total', ascending=False)\n",
    "else:\n",
    "    pivot_comp['_sum'] = pivot_comp.sum(axis=1, skipna=True)\n",
    "    pivot_comp = pivot_comp.sort_values(by='_sum', ascending=False).drop(columns=['_sum'])\n",
    "\n",
    "sido_order = pivot_comp.index.tolist()\n",
    "\n",
    "# ---------- Prepare APT prices (time series) ----------\n",
    "df_apt_wide = df_apt.set_index('region').T\n",
    "with pd.option_context('mode.chained_assignment', None):\n",
    "    try:\n",
    "        df_apt_wide.index = df_apt_wide.index.astype(int)\n",
    "    except Exception:\n",
    "        pass\n",
    "\n",
    "selected_regions = list(df_apt_wide.columns)  # all regions in file\n",
    "\n",
    "# ---------- Plot ----------\n",
    "fig, axes = plt.subplots(2, 1, figsize=(14, 10), constrained_layout=True)\n",
    "\n",
    "# ===== Subplot 1: Grouped bars with hatching =====\n",
    "ax0 = axes[0]\n",
    "data = pivot_comp.loc[sido_order, cols_pref].copy()\n",
    "n_groups = len(data.index)\n",
    "n_bars   = len(cols_pref)\n",
    "x = np.arange(n_groups)\n",
    "\n",
    "bar_width = 0.22 if n_bars == 3 else 0.28\n",
    "offsets = np.linspace(-bar_width*(n_bars-1)/2, bar_width*(n_bars-1)/2, n_bars)\n",
    "\n",
    "# Hatch patterns for each category\n",
    "hatches = ['/', 'o', '*', '\\\\', '.', 'x']\n",
    "\n",
    "bars_handles = []\n",
    "for i, col in enumerate(cols_pref):\n",
    "    vals = data[col].values\n",
    "    h = ax0.bar(x + offsets[i], vals, width=bar_width, hatch=hatches[i % len(hatches)])\n",
    "    bars_handles.append(h)\n",
    "    ymax = data.values.max()\n",
    "    for rect, v in zip(h, vals):\n",
    "        ax0.text(rect.get_x() + rect.get_width()/2,\n",
    "                 rect.get_height() + (ymax * 0.02),\n",
    "                 f\"{int(v):,}\",\n",
    "                 ha='center', va='bottom', fontsize=12, rotation = 90)\n",
    "\n",
    "ax0.set_xticks(x)\n",
    "ax0.set_xticklabels(data.index, rotation=90, fontsize=17)\n",
    "ax0.set_yticks(ax0.get_yticks())\n",
    "ax0.set_yticklabels([f\"{int(tick):,}\" for tick in ax0.get_yticks()], fontsize=16)\n",
    "ax0.set_ylabel(\"Number of Workers (unit: 1,000)\", fontsize=15)\n",
    "ax0.set_title(\"(a) Employment by Firm Size and Region (Total / Conglomerate / SME)\", fontsize=19, pad=15)\n",
    "ax0.grid(axis='y', linestyle='--', alpha=0.5)\n",
    "ax0.legend([h[0] for h in bars_handles], cols_pref, fontsize=16, ncols=len(cols_pref), frameon=False)\n",
    "\n",
    "# ===== Subplot 2: Apartment price trends =====\n",
    "ax1 = axes[1]\n",
    "styles  = ['-', '--', '-.', ':', (0, (1, 1)), (0, (3, 1, 1, 1)), (0, (5, 1))]\n",
    "markers = ['o', 's', '^', 'D', 'P', 'X', 'v', 'h', '*']\n",
    "\n",
    "for j, region in enumerate(selected_regions):\n",
    "    ls = styles[j % len(styles)]\n",
    "    mk = markers[j % len(markers)]\n",
    "    ax1.plot(df_apt_wide.index, df_apt_wide[region],\n",
    "             linestyle=ls, marker=mk, linewidth=2, markersize=8, label=region)\n",
    "\n",
    "ax1.set_xlabel(\"\", fontsize=14)\n",
    "ax1.set_ylabel(\"Average APT Price (10,000 KRW/m²)\", fontsize=15)\n",
    "ax1.set_title(\"(b) Apartment Price Trends\", fontsize=19, pad=15)\n",
    "ax1.tick_params(axis='x', labelsize=17)\n",
    "ax1.tick_params(axis='y', labelsize=16)\n",
    "ax1.grid(True, linestyle='--', alpha=0.5)\n",
    "ax1.legend(title=\"Region\", fontsize=16, title_fontsize=17, ncols=3, frameon=False)\n",
    "\n",
    "# Save & show\n",
    "plt.savefig(os.path.join(BASE, OUTPUT), dpi=600, bbox_inches='tight')\n",
    "plt.savefig(\"Figure_3.jpg\", dpi=600, bbox_inches='tight')\n",
    "\n",
    "plt.show()\n",
    "print(f\"Saved figure to: {os.path.join(BASE, OUTPUT)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4f1b6ab-9aea-471a-96fb-c8bee413c7d2",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "630707de-04d2-4e6f-8de7-df574b258688",
   "metadata": {},
   "source": [
    "# Figure 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b30aca34-4fb2-4031-99f4-782ee87b1a18",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r'E:\\불평등 연구\\데이터\\59_출퇴근불평등\\map\\df_dong_in_out.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4b6d383-94cc-4b5a-8a34-209447dfa235",
   "metadata": {},
   "outputs": [],
   "source": [
    "## json 지도 로드\n",
    "\n",
    "geo_path = r\"E:\\불평등 연구\\데이터\\59_출퇴근불평등\\map\\행정동.json\"\n",
    "geo_json = json.load(open(geo_path, encoding='utf-8'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fbf03001-c4a3-4be3-9e81-6a7d9f56aaf9",
   "metadata": {},
   "outputs": [],
   "source": [
    "lat = df['위도'].mean()\n",
    "long = df['경도'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14bd076e-5a6b-4e2d-99e4-d28a402c617d",
   "metadata": {},
   "outputs": [],
   "source": [
    "m = folium.Map([lat, long], zoom_start=10.5, tiles='cartodbpositron')\n",
    "marker_cluster = MarkerCluster().add_to(m)\n",
    "\n",
    "nan_fill_color = 'white'\n",
    "\n",
    "\n",
    "folium.Choropleth(\n",
    "    geo_data = geo_json, #한국 지도\n",
    "    name='choropleth',\n",
    "    data=df, #데이터를 data_vs로 변환\n",
    "    columns=['full_name', 'seoul_in_pop'], ##매장수전체\n",
    "    key_on='feature.properties.adm_nm', #festures의 properties의 name 이용\n",
    "    fill_color='Reds', #지도 색 바꾸기\n",
    "    fill_opacity=0.7,\n",
    "    line_opacity=0.2,\n",
    "    nan_fill_color=nan_fill_color,\n",
    "    legend_name=\"\",\n",
    "    smooth_factor=0,\n",
    "    bins=8,\n",
    "    highlight=True,\n",
    "    ).add_to(m)\n",
    "\n",
    "m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a6fe045-8b20-41a9-b179-b7e837ac7643",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aaed9a4d-a48d-468a-9c61-9932548580a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "m = folium.Map([lat, long], zoom_start=10.5, tiles='cartodbpositron')\n",
    "marker_cluster = MarkerCluster().add_to(m)\n",
    "\n",
    "nan_fill_color = 'white'\n",
    "\n",
    "\n",
    "folium.Choropleth(\n",
    "    geo_data = geo_json, #한국 지도\n",
    "    name='choropleth',\n",
    "    data=df, #데이터를 data_vs로 변환\n",
    "    columns=['full_name', 'seoul_in_time'], ##매장수전체\n",
    "    key_on='feature.properties.adm_nm', #festures의 properties의 name 이용\n",
    "    fill_color='Purples', #지도 색 바꾸기\n",
    "    fill_opacity=0.7,\n",
    "    line_opacity=0.2,\n",
    "    nan_fill_color=nan_fill_color,\n",
    "    legend_name=\"\",\n",
    "    smooth_factor=0,\n",
    "    bins=8,\n",
    "    highlight=True,\n",
    "    ).add_to(m)\n",
    "\n",
    "m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e2d61bd-67ab-4c23-a1ee-ead19a9844dd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
